Aim

This script describes an example analysis for cyTOF data in R.

The base of this script was developed beginning of 2016 and only slighlty changed since. Thus it is likely that there are better ways to dot his nowadays! Alternative resources for cyTOF analysis are:

Load dependencies

if(!require('bbRtools')){
  devtools::install_git('https://github.com/BodenmillerGroup/bbRtools')
}
## Loading required package: bbRtools

If anything is not installed, install it via install.packages. For Bioconductor packages, follow the bioconductor instructions.

Settings

Set your project specific folders and settings

# input files: paths to your input and output folder

# a folder containing FCS files to be analysised
fcs_folder = './exampledata/'
out_folder = '/home/vitoz/Data/rseminaroutput/'

# the random seed
rand_seed = 1234
# Should the script use only the cells subsampled at the tsne step?
subsampled_cells_only = F

## load tsne: should the script try to load an existing tsne?
load_tsne = F
load_phenograph = F
load_flowsom = F

# filename of the tsne saved by the script
# choose the filename of the tsne such that it reflects the tsne setting
fn_tsne = file.path(out_folder, '20170222_tsne_out.rdata')
fn_phenograph = file.path(out_folder, '20170222_pheno_out.rdata')
fn_flowsom = file.path(out_folder, '20170222_flowsom_out.rdata')

# For plotting only: removes outliers with values higher than xx% of all cells
censor_val = 0.999

# point size for plots
size = 0.1

# define excluded channels
crap_channels = c("Time" ,"Event_length" ,"X89Y_BC1" , "X96Ru_Vol1" , "X98Ru_Vol2","X99Ru_Vol3" , "X100Ru_Vol4" , "X101Ru_Vol5" , "X102Ru_Vol6" , "X103Rh_BC2" , "X104Ru_Vol7" , "X105Pd_BC3" , "X106Pd_BC4" , "X108Pd_BC5" , "X110Pd_BC6" , "X113In_BC7" , "X115In_BC8" , "X120Sn" , "X131Xe" , "X138Ba" , "X140Ce_Beads" , "X150Nd_CD68" , "X151Eu" ,  "X156Gd"  ,"X157Gd" , "X173Yb_CD3" , "X176Yb_CD45" , "X190BCKG" , "X191Ir_DNA1" , "X193Ir_DNA2" , "X195Pt" , "X196Pt" , "X208Pb" , "X209Bi_BC9" ,"MCB102" ,  "MCB104"   ,"MCB105"   ,"MCB106",   "MCB108" ,  "MCB110"  , "MCB113" ,  "MCB115"  ,"DNA191" ,  "DNA193"  , "Live194" ,
                  "Live195" , "beadDist" ,"barcode" ,"Beads140" 
)

Start the script

Load the data

# load data

fcs_files = list.files(fcs_folder,pattern = '*.fcs')
dat = bbRtools::loadConvertMultiFCS(fileDir = fcs_folder)
# give each cell an own ID
dat[, id := paste(1:.N, .BY), by=condition]

setkey(dat, id)
# 'melt' data: make a column 'channel' with all the channels and a column 'counts' with all the counts
dat = melt.data.table(dat, id.vars=c('condition','id'), variable.name='channel', value.name = 'counts' , variable.factor = FALSE)

Create a metadata table

Here we simply extract metadata from the filename - however it is entirely possible to create such a table in excel, save it as a csv and load it here.

Using ‘merge’ the metadata can allways be added whenever needed! This is extremely helpfull, e.g. when looking at clinical data.

dat_meta = data.table(condition=dat[, unique(condition)])
dat_meta[, ':='(
  mdm = bbRtools::getInfoFromFileList(condition,strPos = 1),
  plate = bbRtools::getInfoFromFileList(condition,strPos = 2),
  well = bbRtools::getInfoFromFileList(condition,strPos = 3),
  stimulation = bbRtools::getInfoFromFileList(condition,strPos = c(4,5))
)]

Clean and transform the data

# remove the NA counts

dat = subset(dat, !is.na(counts))


### calculate transformed counts ####
dat[ , counts_transf := asinh(counts/5)]
dat[, counts_scaled := counts/quantile(counts, 0.99), by=.(channel)]
dat[counts_scaled > 1, counts_scaled := 1]

Check if there are only ‘good’ channels selected

Otherwise add the channel to the ‘crap’ channels list

# make a list of good channels
good_channels = unique(dat$channel)[!unique(dat$channel) %in% crap_channels]
print(good_channels)
##  [1] "CD209"   "CD64"    "CD38"    "CD68"    "CD36"    "CD71"    "CD155"  
##  [8] "CD206"   "CD13"    "CD204"   "CD123"   "CD11b"   "CD40"    "Slamf7" 
## [15] "CD197"   "CD273"   "CD304"   "CD166"   "CD82"    "CD274"   "CD119"  
## [22] "CXCR4"   "CD87"    "CD120b"  "CD4"     "CD32"    "CD16"    "CD14"   
## [29] "CD163"   "CD169"   "CD86"    "HLA.ABC" "CD81"    "CD88"    "HLA.DR" 
## [36] "CD54"

Tsne has a problem with duplicated cells, so remove them

# remove duplicate in live sample before running t-SNE
dat[,sum_all := sum(counts[channel %in% good_channels]), by= id]

# remove all 0 cells
dat = subset(dat, sum_all != 0)
# equal to: dat = dat[sum_all !=0,]

Run the tsne analysis

If save tsne is activated,

###### Run Tsne #####
# make a list
setkey(dat, id)
# number of cells per condition
print(dat[ , .(nCells = length(unique(id))), by=condition])
##                              condition nCells
## 1:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs   8665
## 2:      MDM4x_P10_B3_IC_1_CD68 pos.fcs  10016
## 3:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs   8574
## 4:      MDM4x_P10_B5_GC_1_CD68 pos.fcs   2991
## 5: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs   3042
print(dat[ , .(nCells = length(unique(id)))])
##    nCells
## 1:  33288
set.seed(rand_seed)

# choose the tsne channels

tsne_channels = good_channels

if (load_tsne & file.exists(fn_tsne)){
  load(fn_tsne)
} else {
  tsne = bbRtools::calcTSNE(dat, tsne_channels, value_var = 'counts_transf',
                            id_var = 'id', group_var='condition', scale = F, subsample_groups=10000, subsample_mode='unequal', multicore=20)
  
  save(tsne, file=fn_tsne)
  
}
##  [1] "Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000; distance_precomputed = 0 "
##  [2] "Computing input similarities..."                                                           
##  [3] "Building tree..."                                                                          
##  [4] " - point 10000 of 33272"                                                                   
##  [5] " - point 20000 of 33272"                                                                   
##  [6] " - point 30000 of 33272"                                                                   
##  [7] "Done in 2.5877 seconds (sparsity = 0.004252)!"                                             
##  [8] "Learning embedding..."                                                                     
##  [9] "Iteration 50: error is 110.011779 (50 iterations in 5.294261 seconds)"                     
## [10] "Iteration 100: error is 110.011632 (50 iterations in 5.765812 seconds)"                    
## [11] "Iteration 150: error is 100.790237 (50 iterations in 5.847215 seconds)"                    
## [12] "Iteration 200: error is 96.544576 (50 iterations in 5.702226 seconds)"                     
## [13] "Iteration 250: error is 5.481384 (50 iterations in 5.856825 seconds)"                      
## [14] "Iteration 300: error is 4.538533 (50 iterations in 4.971295 seconds)"                      
## [15] "Iteration 350: error is 4.291247 (50 iterations in 4.715139 seconds)"                      
## [16] "Iteration 400: error is 4.140086 (50 iterations in 4.834828 seconds)"                      
## [17] "Iteration 450: error is 4.035494 (50 iterations in 5.052949 seconds)"                      
## [18] "Iteration 500: error is 3.958142 (50 iterations in 4.869244 seconds)"                      
## [19] "Iteration 550: error is 3.897061 (50 iterations in 4.770489 seconds)"                      
## [20] "Iteration 600: error is 3.847240 (50 iterations in 5.008296 seconds)"                      
## [21] "Iteration 650: error is 3.805561 (50 iterations in 4.808290 seconds)"                      
## [22] "Iteration 700: error is 3.770491 (50 iterations in 4.991799 seconds)"                      
## [23] "Iteration 750: error is 3.739777 (50 iterations in 5.024691 seconds)"                      
## [24] "Iteration 800: error is 3.712395 (50 iterations in 5.585260 seconds)"                      
## [25] "Iteration 850: error is 3.688375 (50 iterations in 5.490099 seconds)"                      
## [26] "Iteration 900: error is 3.666562 (50 iterations in 5.786320 seconds)"                      
## [27] "Iteration 950: error is 3.646962 (50 iterations in 5.606620 seconds)"                      
## [28] "Iteration 999: error is 3.629561 (50 iterations in 5.597064 seconds)"                      
## [29] "Fitting performed in 105.578723 seconds."

If selected remove all cells not present in the tsne

if (subsampled_cells_only){
  ########## subset the data to only include the subsampled cells
  setkey(dat, id)
  dat = dat[tsne$Y$id ,]
}

This defines a colormap with lots and lots of colors

qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))

Rseminar: example of ggplot2

marker = good_channels[1]
subset(dat, channel == marker)[tsne$Y] 
##                                  condition
##     1:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs
##     2:      MDM4x_P10_B3_IC_1_CD68 pos.fcs
##     3:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs
##     4:      MDM4x_P10_B5_GC_1_CD68 pos.fcs
##     5: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
##    ---                                    
## 33268:      MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33269:      MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33270:      MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33271:      MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33272:      MDM4x_P10_B3_IC_1_CD68 pos.fcs
##                                           id channel     counts
##     1:     1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs   CD209 27.1289749
##     2:      1 MDM4x_P10_B3_IC_1_CD68 pos.fcs   CD209  7.4247875
##     3:    1 MDM4x_P10_B4_IL10_1_CD68 pos.fcs   CD209  6.8900189
##     4:      1 MDM4x_P10_B5_GC_1_CD68 pos.fcs   CD209  2.2226202
##     5: 1 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs   CD209  3.3131831
##    ---                                                         
## 33268:   9995 MDM4x_P10_B3_IC_1_CD68 pos.fcs   CD209  0.6206078
## 33269:   9996 MDM4x_P10_B3_IC_1_CD68 pos.fcs   CD209  0.0000000
## 33270:   9997 MDM4x_P10_B3_IC_1_CD68 pos.fcs   CD209  3.6047390
## 33271:   9998 MDM4x_P10_B3_IC_1_CD68 pos.fcs   CD209  0.0000000
## 33272:   9999 MDM4x_P10_B3_IC_1_CD68 pos.fcs   CD209  9.8142004
##        counts_transf counts_scaled   sum_all      bh_V1      bh_V2
##     1:     2.3926975   0.205122345 2600.5371   1.345933 -24.432269
##     2:     1.1863901   0.056138864 1373.5257  25.981259   2.711590
##     3:     1.1251302   0.052095475 2113.7750  -4.959740  12.619178
##     4:     0.4310509   0.016805245 1737.7808 -18.168732  15.275452
##     5:     0.6217888   0.025050997 2706.4836 -20.130232  14.366106
##    ---                                                            
## 33268:     0.1238050   0.004692419  574.2401   6.078643  -6.179152
## 33269:     0.0000000   0.000000000 2643.3191  22.915985   8.332606
## 33270:     0.6697432   0.027255453 1192.3619   6.286984   1.480164
## 33271:     0.0000000   0.000000000 1981.0100  28.179845   7.400114
## 33272:     1.4268925   0.074205229 2259.0834  22.919115  14.545141
p = ggplot(subset(dat, channel == marker)[tsne$Y] , aes(x=bh_V1, y=bh_V2, color=counts_transf))+
  geom_density2d()+
  geom_point(alpha=0.5, size=1, aes(shape=condition))+
  scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
  ggtitle(marker)

p

Look at an example channel mapped on the tsne

#plot the t-SNE Map by marker
print(good_channels)
##  [1] "CD209"   "CD64"    "CD38"    "CD68"    "CD36"    "CD71"    "CD155"  
##  [8] "CD206"   "CD13"    "CD204"   "CD123"   "CD11b"   "CD40"    "Slamf7" 
## [15] "CD197"   "CD273"   "CD304"   "CD166"   "CD82"    "CD274"   "CD119"  
## [22] "CXCR4"   "CD87"    "CD120b"  "CD4"     "CD32"    "CD16"    "CD14"   
## [29] "CD163"   "CD169"   "CD86"    "HLA.ABC" "CD81"    "CD88"    "HLA.DR" 
## [36] "CD54"
# you can replace good_channels[1] with any marker name
marker = good_channels[1]
p =subset(dat, channel == marker)[tsne$Y] %>%
  ggplot(aes(x=bh_V1, y=bh_V2, color=censor_dat(counts,censor_val)))+
  geom_point(alpha=0.5, size=1)+
  scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
  ggtitle(marker)

p

Rseminar: example of plutting multiple markers

plot_markers = c("CD38" ,   "CD68"  ,  "CD36"  ,  "CD71" )

for (marker in plot_markers){
  p =subset(dat, channel == marker)[tsne$Y] %>%
    ggplot(aes(x=bh_V1, y=bh_V2, color=censor_dat(counts,censor_val)))+
    geom_point(alpha=0.5, size=1)+
    scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
    ggtitle(marker)
  
  print(p)
}

Rseminar: example of plutting multiple markers with faceting

plot_markers = c("CD38" ,   "CD68"  ,  "CD36"  ,  "CD71" )


p =subset(dat, channel %in% plot_markers)[tsne$Y] %>%
  ggplot(aes(x=bh_V1, y=bh_V2, color=censor_dat(counts,censor_val)))+
  facet_wrap(~channel)+
  geom_point(alpha=0.5, size=1)+
  scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
  ggtitle('Multiple markers')

print(p)

Make an overview plot of all the channels on the tsne

#plot a t-SNE Map for all the markers
dat[, c_counts := bbRtools::censor_dat(counts_transf,censor_val), by=channel]
dat[, c_counts_scaled := c_counts, by=channel]
dat[, c_counts_scaled := (c_counts_scaled/max(c_counts_scaled)), by=channel]
dat[c_counts_scaled < 0, c_counts_scaled := 0, by=channel]

p = subset(dat, channel %in% good_channels)[tsne$Y] %>%
  ggplot(aes(x=bh_V1, y=bh_V2, color=c_counts_scaled))+
  facet_wrap(~channel, scales = "free", ncol = 10)+
  geom_point(alpha=0.5, size=0.5)+
  scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
  ggtitle('')+
  theme(strip.background = element_blank(),
        axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank()) 

p

Phenograph analysis

if (load_phenograph & file.exists(fn_phenograph)){
  load(fn_phenograph)
} else {
  # with joining with the tsne data, only the cells used for tsne are used
  cluster_pheno = bbRtools::do_phenograph(dat, good_channels, valuevar='counts_transf', idvar = 'id', k = 10, seed = rand_seed)
  
  save(cluster_pheno, file=fn_phenograph)
  
}
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
## Run Rphenograph starts:
##   -Input data of 33288 rows and 36 columns
##   -k is set to 10
##   Finding nearest neighbors...DONE ~ 199.865 s
##   Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.048 s
##   Build undirected graph from the weighted links...DONE ~ 0.891 s
##   Run louvain clustering on the graph ...DONE ~ 1.154 s
## Run Rphenograph DONE, took a total of 202.958s.
##   Return a community class
##   -Modularity value: 0.8383463 
##   -Number of clusters: 19
# with joining with the tsne data, only the cells used for tsne are used

if (load_flowsom & file.exists(fn_flowsom)){
  load(fn_flowsom)
} else {
  # with joining with the tsne data, only the cells used for tsne are used
  cluster_flowsom = bbRtools::do_flowsom(dat, good_channels, valuevar='counts_transf', idvar = 'id', k = 10, seed = rand_seed)
  
  save(cluster_flowsom, file=fn_flowsom)
  
}
## Building SOM
## Mapping data to SOM
## Building MST
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered

Map phenograph on the tsne

p = subset(dat, !duplicated(id))[tsne$Y][cluster_pheno] %>%
  ggplot(aes(x=bh_V1, y=bh_V2))+
  geom_point(size=0.3, alpha=1, aes(color=cluster, shape=condition))+
  #scale_colour_brewer(palette="Paired")+
  scale_color_manual(values = col_vector[10:70])+
  ggtitle('Conditions')+
  #stat_density2d(alpha=0.5, group=1, color='black')+
  guides(color=guide_legend(override.aes=list(size=5)))+
  theme(strip.background = element_blank(),
        panel.background=element_rect(fill='white', colour = 'black'),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank(),
        legend.key = element_blank())
p
## Warning: Removed 36 rows containing missing values (geom_point).

# Plot a heatmap of the phenograph clusters

Convert the data into a matrix to do a heatmap

summary_dat = dat[cluster_pheno][ channel %in% good_channels ,list(
  median_val = median(counts),
  mean_val= mean(counts),
  cell_cluster=.N
), by=.(channel,cluster)]

hm_dat = dcast.data.table(data =summary_dat, formula = 'cluster ~ channel',
                          value.var = 'median_val')

# save column
trownames = hm_dat$cluster

# convert to a matrix
hm_dat = as.matrix(hm_dat[,-1,with=F])
row.names(hm_dat) = trownames

Plot the heatmap with z-scoring per marker

cols = rev(brewer.pal(11,'Spectral'))
cmap = colorRampPalette(cols)


tdist = as.dist(1-cor(t(hm_dat), method="spearman"))
#tdist = dist((hm_dat), method="euclidean")
hr <- hclust(tdist, method="ward.D")
co_r <- order.optimal(tdist, hr$merge)
hr$merge = co_r$merge
hr$order = co_r$order

order_heatmap_zscored = row.names(hm_dat)[hr$order]

tdist = as.dist(1-cor((hm_dat), method="spearman"))
#tdist = dist(t(hm_dat), method="euclidean")
hc <- hclust(tdist, method="ward.D")
co_c <- order.optimal(tdist, hc$merge)
hc$merge = co_c$merge
hc$order = co_c$order

#z-score
p_dat = scale(hm_dat)

# censor z-score at 2
p_dat[p_dat > 2] =2
p_dat[p_dat < -2] =-2


heatmap.2(p_dat,
          scale ='none',
          trace = "none",
          col=cmap(75),
          Rowv=as.dendrogram(hr),
          Colv=as.dendrogram(hc),
          density.info ='none',
          #Colv =NA,
          #keyorient=2,                                                  
          #sepwidth = c(0,0),
          cexRow=0.6,
          cexCol=0.6,
          margins=c(4,4),
          xlab = 'Conditions',
          ylab ='Cluster',
          main = 'Cluster markers, z-scored'
)

Heatmap with 0-1 scaling on the transformed data

p_dat = asinh(hm_dat/5)
p_dat = t(t(p_dat) / (apply(p_dat, 2, function(x) quantile(x, 0.99))))
tdist = as.dist(1-cor(t(p_dat), method="spearman"))

hr <- hclust(tdist, method="ward.D")
co_r <- order.optimal(tdist, hr$merge)
hr$merge = co_r$merge
hr$order = co_r$order

order_heatmap_01 = row.names(hm_dat)[hr$order]

tdist = as.dist(1-cor((hm_dat), method="spearman"))
hc <- hclust(tdist, method="ward.D")
co_c <- order.optimal(tdist, hc$merge)
hc$merge = co_c$merge
hc$order = co_c$order

heatmap.2(p_dat,
          scale ='none',
          trace = "none",
          col=cmap(75),
          Rowv=as.dendrogram(hr),
          #RowSideColors=sel_col_vector[group_levels],
          #dendrogram='column',
          Colv=as.dendrogram(hc),
          density.info ='none',
          #Colv =NA,
          #keyorient=2,                                                  
          xlab = 'Conditions',
          ylab ='Cluster',
          main = 'Cluster markers, 0-1 scaled',
          #sepwidth = c(0,0),
          cexRow=0.6,
          cexCol=0.6,
          margins=c(4,4)
          
          #comments = data.frame(names = row.names(tab_Prot))
)

Plot a heatmap with cluster frequencies on the side

Prepare the data

##############################distribution of cluster per condtition in a table


dat_cluster_condition = dat[cluster_pheno][, .(cluster_mean = mean(counts_transf), ncells=.N),by=.(condition, channel, cluster)]
dat_cluster_condition[, id2:=paste(cluster, condition)]
setkey(dat_cluster_condition, 'id2')

cluster_dat = subset(dat_cluster_condition, !duplicated(id2))

cluster_dat[, frac_cluster := ncells/sum(ncells), by=condition]
cluster_dat[, frac_cells := ncells/sum(ncells), by=cluster]

# this will order the clusters the same as in the heatmap above
cluster_dat[, cluster_s := factor(as.character(cluster),levels = order_heatmap_zscored)]

Plot the fraction of cells per cluster as a heatmap

p <- ggplot(cluster_dat, aes(x=cluster_s, y=condition)) + 
  geom_tile(aes(fill = frac_cells), colour = "white") + 
  scale_fill_gradient(low = "white", high = "steelblue")+
  theme(panel.background = element_blank())+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
p

Plot the cluster composition

#distribution of cluster per condtition in a staked bar graph

p <- ggplot(cluster_dat, aes(x=cluster_s, y=frac_cells, fill=condition)) + 
  geom_bar(stat='identity')+
  scale_fill_manual(values = col_vector)+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('cluster composition')
p

Plot the condition composition

p <- ggplot(cluster_dat, aes(x=condition, y=frac_cluster, fill=cluster)) + 
  geom_bar(stat='identity')+
  scale_fill_manual(values = col_vector)+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('Condition composition')
p

Distribution of cell number per cluster

#distribution of cells per cluster
cluster_sum =cluster_dat[, .(cells_cluster = sum(ncells)), by=cluster_s]
p <- ggplot(cluster_sum, aes(x=cluster_s, y=cells_cluster)) + 
  geom_bar(stat='identity')+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('distribution of cell numbers')
p

Distribution of conditions per cluster

###
cluster_dat[, frac_cells_condition := ncells/sum(ncells), by=condition]
cluster_dat[, frac_cells_scaled := frac_cells_condition/sum(frac_cells_condition), by=cluster]
cluster_dat[, cluster_s := factor(as.character(cluster),levels = row.names(hm_dat)[hr$order])]

p <- merge(cluster_dat, dat_meta,by = 'condition') %>%
  ggplot(aes(x=cluster_s, y=frac_cells_scaled, fill=stimulation)) + 
  geom_bar(stat='identity')+
  scale_fill_manual(values = col_vector)+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('Distribution of conditions per cluster')
p

p <- merge(cluster_dat, dat_meta,by = 'condition') %>%
  ggplot(aes(x=cluster_s, y=frac_cells_scaled, fill=well)) + 
  geom_bar(stat='identity')+
  scale_fill_manual(values = col_vector)+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('Distribution of conditions per cluster')
p

merge(cluster_dat, dat_meta,by = 'condition')
##                               condition channel cluster cluster_mean
##  1:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       1     15.55951
##  2:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time      10     15.27975
##  3:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time      12     15.04920
##  4:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time      13     14.93846
##  5:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time      15     15.65354
##  6:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time      16     15.41448
##  7:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time      17     14.96891
##  8:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       2     14.77665
##  9:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       3     14.56104
## 10:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       4     14.87216
## 11:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       5     15.21564
## 12:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       6     14.96687
## 13:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       7     14.85837
## 14:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       8     15.31478
## 15:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time       9     15.08621
## 16:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs    Time    <NA>     14.91948
## 17:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time       1     14.69155
## 18:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time      11     15.05925
## 19:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time      13     15.84435
## 20:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time      14     14.69747
## 21:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time      15     15.12603
## 22:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time      16     15.15223
## 23:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time      18     14.67412
## 24:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time      19     14.88106
## 25:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time       6     15.26818
## 26:      MDM4x_P10_B3_IC_1_CD68 pos.fcs    Time    <NA>     15.26710
## 27:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       1     15.28458
## 28:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time      10     15.24115
## 29:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time      12     14.77313
## 30:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time      13     15.11324
## 31:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time      15     15.49566
## 32:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time      16     15.41977
## 33:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time      17     15.54537
## 34:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       2     14.59519
## 35:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       3     14.85595
## 36:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       5     15.47432
## 37:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       6     14.86569
## 38:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       7     14.21040
## 39:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       8     15.02489
## 40:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time       9     14.84203
## 41:    MDM4x_P10_B4_IL10_1_CD68 pos.fcs    Time    <NA>     15.42618
## 42:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time       1     15.21211
## 43:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time      10     15.13102
## 44:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time      12     14.50188
## 45:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time      13     14.84974
## 46:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time      16     15.12043
## 47:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time       2     14.36221
## 48:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time       3     14.69176
## 49:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time       6     14.78551
## 50:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time       8     15.18509
## 51:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time       9     14.98002
## 52:      MDM4x_P10_B5_GC_1_CD68 pos.fcs    Time    <NA>     15.31220
## 53: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time       1     15.18601
## 54: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time      10     15.00187
## 55: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time      12     15.12195
## 56: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time      13     14.96634
## 57: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time      14     15.71903
## 58: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time      16     14.51320
## 59: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time       2     14.70602
## 60: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time       3     14.66166
## 61: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time       5     15.89804
## 62: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time       6     14.91729
## 63: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time       8     14.85298
## 64: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs    Time       9     14.78067
##                               condition channel cluster cluster_mean
##     ncells                                    id2 frac_cluster
##  1:     10      1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.154068e-03
##  2:     26     10 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.000577e-03
##  3:     14     12 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.615695e-03
##  4:     22     13 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 2.538950e-03
##  5:      1     15 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.154068e-04
##  6:      1     16 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.154068e-04
##  7:    130     17 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.500289e-02
##  8:   6188      2 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 7.141373e-01
##  9:      5      3 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 5.770340e-04
## 10:     69      4 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 7.963070e-03
## 11:   2040      5 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 2.354299e-01
## 12:     31      6 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.577611e-03
## 13:     99      7 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.142527e-02
## 14:      3      8 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.462204e-04
## 15:     19      9 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 2.192729e-03
## 16:      7     NA MDM4x_P10_B2_IL4_1_CD68 pos.fcs 8.078477e-04
## 17:      1       1 MDM4x_P10_B3_IC_1_CD68 pos.fcs 9.984026e-05
## 18:    120      11 MDM4x_P10_B3_IC_1_CD68 pos.fcs 1.198083e-02
## 19:      1      13 MDM4x_P10_B3_IC_1_CD68 pos.fcs 9.984026e-05
## 20:   4638      14 MDM4x_P10_B3_IC_1_CD68 pos.fcs 4.630591e-01
## 21:   2621      15 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.616813e-01
## 22:     28      16 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.795527e-03
## 23:    129      18 MDM4x_P10_B3_IC_1_CD68 pos.fcs 1.287939e-02
## 24:   2469      19 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.465056e-01
## 25:      3       6 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.995208e-04
## 26:      6      NA MDM4x_P10_B3_IC_1_CD68 pos.fcs 5.990415e-04
## 27:    131     1 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.527875e-02
## 28:   1809    10 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 2.109867e-01
## 29:   5300    12 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 6.181479e-01
## 30:      5    13 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 5.831584e-04
## 31:      1    15 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.166317e-04
## 32:      5    16 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 5.831584e-04
## 33:      1    17 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.166317e-04
## 34:      9     2 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.049685e-03
## 35:    142     3 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.656170e-02
## 36:      2     5 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 2.332634e-04
## 37:    626     6 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 7.301143e-02
## 38:      3     7 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 3.498950e-04
## 39:     93     8 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.084675e-02
## 40:    443     9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 5.166783e-02
## 41:      4    NA MDM4x_P10_B4_IL10_1_CD68 pos.fcs 4.665267e-04
## 42:    814       1 MDM4x_P10_B5_GC_1_CD68 pos.fcs 2.721498e-01
## 43:    149      10 MDM4x_P10_B5_GC_1_CD68 pos.fcs 4.981612e-02
## 44:     42      12 MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.404213e-02
## 45:     27      13 MDM4x_P10_B5_GC_1_CD68 pos.fcs 9.027081e-03
## 46:     10      16 MDM4x_P10_B5_GC_1_CD68 pos.fcs 3.343363e-03
## 47:      1       2 MDM4x_P10_B5_GC_1_CD68 pos.fcs 3.343363e-04
## 48:   1678       3 MDM4x_P10_B5_GC_1_CD68 pos.fcs 5.610164e-01
## 49:     51       6 MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.705115e-02
## 50:     33       8 MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.103310e-02
## 51:    183       9 MDM4x_P10_B5_GC_1_CD68 pos.fcs 6.118355e-02
## 52:      3      NA MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.003009e-03
## 53:    779  1 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.560815e-01
## 54:     73 10 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.399737e-02
## 55:     30 12 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 9.861933e-03
## 56:     77 13 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.531229e-02
## 57:      1 14 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.287311e-04
## 58:      9 16 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.958580e-03
## 59:      2  2 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.574622e-04
## 60:   1892  3 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.219592e-01
## 61:      1  5 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.287311e-04
## 62:     45  6 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 1.479290e-02
## 63:     27  8 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 8.875740e-03
## 64:    106  9 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.484550e-02
##     ncells                                    id2 frac_cluster
##       frac_cells cluster_s frac_cells_condition frac_cells_scaled   mdm
##  1: 0.0057636888         1         1.154068e-03      0.0021184736 MDM4x
##  2: 0.0126397667        10         3.000577e-03      0.0104258827 MDM4x
##  3: 0.0025993316        12         1.615695e-03      0.0025101391 MDM4x
##  4: 0.1666666667        13         2.538950e-03      0.0675947893 MDM4x
##  5: 0.0003812429        15         1.154068e-04      0.0004406297 MDM4x
##  6: 0.0188679245        16         1.154068e-04      0.0117809707 MDM4x
##  7: 0.9923664122        17         1.500289e-02      0.9922860182 MDM4x
##  8: 0.9980645161         2         7.141373e-01      0.9971494778 MDM4x
##  9: 0.0013451708         3         5.770340e-04      0.0004808159 MDM4x
## 10: 1.0000000000         4         7.963070e-03      1.0000000000 MDM4x
## 11: 0.9985315712         5         2.354299e-01      0.9976185857 MDM4x
## 12: 0.0410052910         6         3.577611e-03      0.0329028332 MDM4x
## 13: 0.9705882353         7         1.142527e-02      0.9702853498 MDM4x
## 14: 0.0192307692         8         3.462204e-04      0.0111318434 MDM4x
## 15: 0.0252996005         9         2.192729e-03      0.0146289618 MDM4x
## 16: 0.3500000000      <NA>         8.078477e-04      0.2808512935 MDM4x
## 17: 0.0005763689         1         9.984026e-05      0.0001832725 MDM4x
## 18: 1.0000000000        11         1.198083e-02      1.0000000000 MDM4x
## 19: 0.0075757576        13         9.984026e-05      0.0026580601 MDM4x
## 20: 0.9997844363        14         4.630591e-01      0.9992905919 MDM4x
## 21: 0.9992375143        15         2.616813e-01      0.9991140639 MDM4x
## 22: 0.5283018868        16         2.795527e-03      0.2853733137 MDM4x
## 23: 1.0000000000        18         1.287939e-02      1.0000000000 MDM4x
## 24: 1.0000000000        19         2.465056e-01      1.0000000000 MDM4x
## 25: 0.0039682540         6         2.995208e-04      0.0027546543 MDM4x
## 26: 0.3000000000      <NA>         5.990415e-04      0.2082590534 MDM4x
## 27: 0.0755043228         1         1.527875e-02      0.0280465498 MDM4x
## 28: 0.8794360719        10         2.109867e-01      0.7330998690 MDM4x
## 29: 0.9840326773        12         6.181479e-01      0.9603525905 MDM4x
## 30: 0.0378787879        13         5.831584e-04      0.0155255012 MDM4x
## 31: 0.0003812429        15         1.166317e-04      0.0004453063 MDM4x
## 32: 0.0943396226        16         5.831584e-04      0.0595300391 MDM4x
## 33: 0.0076335878        17         1.166317e-04      0.0077139818 MDM4x
## 34: 0.0014516129         2         1.049685e-03      0.0014656746 MDM4x
## 35: 0.0382028518         3         1.656170e-02      0.0138001001 MDM4x
## 36: 0.0009789525         5         2.332634e-04      0.0009884380 MDM4x
## 37: 0.8280423280         6         7.301143e-02      0.6714768164 MDM4x
## 38: 0.0294117647         7         3.498950e-04      0.0297146502 MDM4x
## 39: 0.5961538462         8         1.084675e-02      0.3487497223 MDM4x
## 40: 0.5898801598         9         5.166783e-02      0.3447059019 MDM4x
## 41: 0.2000000000      <NA>         4.665267e-04      0.1621897736 MDM4x
## 42: 0.4691642651         1         2.721498e-01      0.4995737597 MDM4x
## 43: 0.0724355858        10         4.981612e-02      0.1730923641 MDM4x
## 44: 0.0077979948        12         1.404213e-02      0.0218158028 MDM4x
## 45: 0.2045454545        13         9.027081e-03      0.2403291532 MDM4x
## 46: 0.1886792453        16         3.343363e-03      0.3412975964 MDM4x
## 47: 0.0001612903         2         3.343363e-04      0.0004668336 MDM4x
## 48: 0.4514393328         3         5.610164e-01      0.4674691054 MDM4x
## 49: 0.0674603175         6         1.705115e-02      0.1568172854 MDM4x
## 50: 0.2115384615         8         1.103310e-02      0.3547414427 MDM4x
## 51: 0.2436750999         9         6.118355e-02      0.4081907405 MDM4x
## 52: 0.1500000000      <NA>         1.003009e-03      0.3486998794 MDM4x
## 53: 0.4489913545         1         2.560815e-01      0.4700779443 MDM4x
## 54: 0.0354885756        10         2.399737e-02      0.0833818842 MDM4x
## 55: 0.0055699963        12         9.861933e-03      0.0153214676 MDM4x
## 56: 0.5833333333        13         2.531229e-02      0.6738924962 MDM4x
## 57: 0.0002155637        14         3.287311e-04      0.0007094081 MDM4x
## 58: 0.1698113208        16         2.958580e-03      0.3020180801 MDM4x
## 59: 0.0003225806         2         6.574622e-04      0.0009180140 MDM4x
## 60: 0.5090126446         3         6.219592e-01      0.5182499787 MDM4x
## 61: 0.0004894763         5         3.287311e-04      0.0013929763 MDM4x
## 62: 0.0595238095         6         1.479290e-02      0.1360484107 MDM4x
## 63: 0.1730769231         8         8.875740e-03      0.2853769917 MDM4x
## 64: 0.1411451398         9         3.484550e-02      0.2324743958 MDM4x
##       frac_cells cluster_s frac_cells_condition frac_cells_scaled   mdm
##     plate well stimulation
##  1:   P10   B2       IL4_1
##  2:   P10   B2       IL4_1
##  3:   P10   B2       IL4_1
##  4:   P10   B2       IL4_1
##  5:   P10   B2       IL4_1
##  6:   P10   B2       IL4_1
##  7:   P10   B2       IL4_1
##  8:   P10   B2       IL4_1
##  9:   P10   B2       IL4_1
## 10:   P10   B2       IL4_1
## 11:   P10   B2       IL4_1
## 12:   P10   B2       IL4_1
## 13:   P10   B2       IL4_1
## 14:   P10   B2       IL4_1
## 15:   P10   B2       IL4_1
## 16:   P10   B2       IL4_1
## 17:   P10   B3        IC_1
## 18:   P10   B3        IC_1
## 19:   P10   B3        IC_1
## 20:   P10   B3        IC_1
## 21:   P10   B3        IC_1
## 22:   P10   B3        IC_1
## 23:   P10   B3        IC_1
## 24:   P10   B3        IC_1
## 25:   P10   B3        IC_1
## 26:   P10   B3        IC_1
## 27:   P10   B4      IL10_1
## 28:   P10   B4      IL10_1
## 29:   P10   B4      IL10_1
## 30:   P10   B4      IL10_1
## 31:   P10   B4      IL10_1
## 32:   P10   B4      IL10_1
## 33:   P10   B4      IL10_1
## 34:   P10   B4      IL10_1
## 35:   P10   B4      IL10_1
## 36:   P10   B4      IL10_1
## 37:   P10   B4      IL10_1
## 38:   P10   B4      IL10_1
## 39:   P10   B4      IL10_1
## 40:   P10   B4      IL10_1
## 41:   P10   B4      IL10_1
## 42:   P10   B5        GC_1
## 43:   P10   B5        GC_1
## 44:   P10   B5        GC_1
## 45:   P10   B5        GC_1
## 46:   P10   B5        GC_1
## 47:   P10   B5        GC_1
## 48:   P10   B5        GC_1
## 49:   P10   B5        GC_1
## 50:   P10   B5        GC_1
## 51:   P10   B5        GC_1
## 52:   P10   B5        GC_1
## 53:   P10   B6     GC_TGFB
## 54:   P10   B6     GC_TGFB
## 55:   P10   B6     GC_TGFB
## 56:   P10   B6     GC_TGFB
## 57:   P10   B6     GC_TGFB
## 58:   P10   B6     GC_TGFB
## 59:   P10   B6     GC_TGFB
## 60:   P10   B6     GC_TGFB
## 61:   P10   B6     GC_TGFB
## 62:   P10   B6     GC_TGFB
## 63:   P10   B6     GC_TGFB
## 64:   P10   B6     GC_TGFB
##     plate well stimulation

an example how to add manual cluster annotations

(These annotations actually do not fit this example dataset - it is just an illustration how it can be done)

fn_clusterannot = './exampledata/example_clusteranotations.csv'

dat_clusterannot = fread(fn_clusterannot)

merge(dat[cluster_pheno], dat_clusterannot, by='cluster') 
##         cluster                        condition
##      1:       1   MDM4x_P10_B5_GC_1_CD68 pos.fcs
##      2:       1   MDM4x_P10_B5_GC_1_CD68 pos.fcs
##      3:       1   MDM4x_P10_B5_GC_1_CD68 pos.fcs
##      4:       1   MDM4x_P10_B5_GC_1_CD68 pos.fcs
##      5:       1   MDM4x_P10_B5_GC_1_CD68 pos.fcs
##     ---                                         
## 823033:       9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823034:       9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823035:       9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823036:       9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823037:       9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
##                                           id      channel       counts
##      1:  1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs         Time 6.687669e+06
##      2:  1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs Event_length 3.700000e+01
##      3:  1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs       MCB102 2.864088e+00
##      4:  1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs       MCB104 2.371512e+01
##      5:  1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs       MCB105 4.048273e+03
##     ---                                                               
## 823033: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs       DNA193 3.874563e+03
## 823034: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs      Live194 2.893113e+02
## 823035: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs      Live195 2.729424e+02
## 823036: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs     beadDist 5.990276e+01
## 823037: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs      barcode 3.000000e+00
##         counts_transf counts_scaled  sum_all   c_counts c_counts_scaled
##      1:    14.7994852    0.33216193 1038.041 14.7994852       0.9302322
##      2:     2.6991616    0.51388889 1038.041  2.6991616       0.7933328
##      3:     0.5453769    0.16090627 1038.041  0.5453769       0.2084160
##      4:     2.2607542    0.01672941 1038.041  2.2607542       0.3440973
##      5:     7.3897554    1.00000000 1038.041  7.3897554       0.9744026
##     ---                                                                
## 823033:     7.3458978    0.82966404 1414.569  7.3458978       0.9622110
## 823034:     4.7512872    0.53272741 1414.569  4.7512872       0.8507562
## 823035:     4.6930540    0.56243807 1414.569  4.6930540       0.8557932
## 823036:     3.1781690    0.59502903 1414.569  3.1781690       0.8446354
## 823037:     0.5688249    0.60000000 1414.569  0.5688249       0.6453846
##         celltype   compartment
##      1: cd12321+   interesting
##      2: cd12321+   interesting
##      3: cd12321+   interesting
##      4: cd12321+   interesting
##      5: cd12321+   interesting
##     ---                       
## 823033:  unknown uninteresting
## 823034:  unknown uninteresting
## 823035:  unknown uninteresting
## 823036:  unknown uninteresting
## 823037:  unknown uninteresting

Whenever needed the cluster annotations can now be added to the cluster data

p <- merge(cluster_dat, dat_clusterannot, by='cluster') %>% 
  ggplot(aes(x=condition, y=frac_cluster, fill=celltype, color=cluster)) + 
  geom_bar(stat='identity')+
  scale_fill_manual(values = col_vector)+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('Condition composition')
p

Condition metadata and cluster metadata can be even combined simultaneoulsy

p <- merge(cluster_dat, dat_clusterannot, by='cluster') %>%
  merge(dat_meta, by='condition') %>%
  ggplot(aes(x=stimulation, y=frac_cluster, fill=celltype)) + 
  geom_bar(stat='identity')+
  scale_fill_manual(values = col_vector)+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('Condition composition')
print(p)

ggsave(file.path(out_folder, 'condtion_composition.pdf'),plot=p,  width = 5, height = 5)

Rseminar: Colormap example

p <- merge(cluster_dat, dat_clusterannot, by='cluster') %>%
  merge(dat_meta, by='condition') %>%
  ggplot(aes(x=stimulation, y=frac_cluster, fill=celltype)) + 
  geom_bar(stat='identity')+
  scale_fill_brewer(type = 'qual',palette = 'set1')+
  coord_flip()+
  theme(panel.background = element_blank())+
  ggtitle('Condition composition')
## Warning in pal_name(palette, type): Unknown palette set1
print(p)

additional examples how to use the data

Example how to add the metadata:

merge(dat, dat_meta,by = 'condition')
##                                    condition
##       1:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs
##       2:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs
##       3:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs
##       4:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs
##       5:     MDM4x_P10_B2_IL4_1_CD68 pos.fcs
##      ---                                    
## 1764260: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764261: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764262: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764263: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764264: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
##                                               id      channel      counts
##       1:       1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs         Time 2187.266113
##       2:       1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs Event_length   51.000000
##       3:       1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs       MCB102    1.252187
##       4:       1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs       MCB104  850.113953
##       5:       1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs       MCB105 1341.456665
##      ---                                                                 
## 1764260: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs       DNA193 3209.162109
## 1764261: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs      Live194  221.384903
## 1764262: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs      Live195  150.259659
## 1764263: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs     beadDist   77.466232
## 1764264: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs      barcode    5.000000
##          counts_transf counts_scaled  sum_all  c_counts c_counts_scaled
##       1:     6.7741183  0.0001086367 2600.537 6.7741183      0.42579203
##       2:     3.0179292  0.7083333333 2600.537 3.0179292      0.88702446
##       3:     0.2478907  0.0703486341 2600.537 0.2478907      0.09473156
##       4:     5.8290883  0.5996978752 2600.537 5.8290883      0.88721440
##       5:     6.2852241  0.3498670418 2600.537 6.2852241      0.82876068
##      ---                                                               
## 1764260:     7.1574750  0.6871810869 1425.255 7.1574750      0.93753026
## 1764261:     4.4837396  0.4076501769 1425.255 4.4837396      0.80284961
## 1764262:     4.0963508  0.3096321747 1425.255 4.0963508      0.74698249
## 1764263:     3.4345913  0.7694914306 1425.255 3.4345913      0.91278260
## 1764264:     0.8813736  1.0000000000 1425.255 0.8813736      1.00000000
##            mdm plate well stimulation
##       1: MDM4x   P10   B2       IL4_1
##       2: MDM4x   P10   B2       IL4_1
##       3: MDM4x   P10   B2       IL4_1
##       4: MDM4x   P10   B2       IL4_1
##       5: MDM4x   P10   B2       IL4_1
##      ---                             
## 1764260: MDM4x   P10   B6     GC_TGFB
## 1764261: MDM4x   P10   B6     GC_TGFB
## 1764262: MDM4x   P10   B6     GC_TGFB
## 1764263: MDM4x   P10   B6     GC_TGFB
## 1764264: MDM4x   P10   B6     GC_TGFB

How to summarize with data.table

dat[channel=='barcode', mean(counts)]
## [1] 2.451124
dat[, mean(counts), by=list(channel, condition)]
##           channel                           condition           V1
##   1:         Time     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.030727e+07
##   2: Event_length     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 4.259469e+01
##   3:       MCB102     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.579586e+00
##   4:       MCB104     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 8.768262e+02
##   5:       MCB105     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.432730e+03
##  ---                                                              
## 261:       DNA193 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.257350e+03
## 262:      Live194 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.355503e+02
## 263:      Live195 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.705967e+02
## 264:     beadDist MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.862977e+01
## 265:      barcode MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 5.000000e+00
dat[, .(meancounts=mean(counts),
        n=.N),
    , by=list(channel, condition)]
##           channel                           condition   meancounts    n
##   1:         Time     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.030727e+07 8665
##   2: Event_length     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 4.259469e+01 8665
##   3:       MCB102     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.579586e+00 8665
##   4:       MCB104     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 8.768262e+02 8665
##   5:       MCB105     MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.432730e+03 8665
##  ---                                                                   
## 261:       DNA193 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.257350e+03 3042
## 262:      Live194 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.355503e+02 3042
## 263:      Live195 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.705967e+02 3042
## 264:     beadDist MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.862977e+01 3042
## 265:      barcode MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 5.000000e+00 3042
dat[, counts_mean_normalized := counts/mean(counts), by=list(channel, condition)]


dat[, counts_mean_normalized := counts/mean(counts), by=list(channel, condition)]

Example of renaming and deleting

dat[channel == "HLA.ABC", channel := 'HLA-ABC']
# remove a column
# add it as an example
dat[, channel3 := channel]

# remove it
dat[, channel3 := NULL]


#dat = dat[!channel %in% c('Time', 'barcode'),]
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggplot2_3.2.1      cba_0.2-20         proxy_0.4-22      
##  [4] gplots_3.0.1       dtplyr_0.0.3       dplyr_0.8.3       
##  [7] RColorBrewer_1.1-2 Rtsne_0.15         data.table_1.12.2 
## [10] bbRtools_0.6.0    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.1.0                backports_1.1.4            
##   [3] Hmisc_4.1-1                 VGAM_1.0-6                 
##   [5] RcppEigen_0.3.3.4.0         plyr_1.8.4                 
##   [7] igraph_1.2.4.1              ConsensusClusterPlus_1.42.0
##   [9] lazyeval_0.2.2              sp_1.3-1                   
##  [11] splines_3.4.4               flowCore_1.44.2            
##  [13] digest_0.6.20               foreach_1.4.4              
##  [15] htmltools_0.3.6             gdata_2.18.0               
##  [17] magrittr_1.5                checkmate_1.8.5            
##  [19] cluster_2.0.7-1             doParallel_1.0.11          
##  [21] openxlsx_4.1.0              shinyFiles_0.7.0           
##  [23] Rphenograph_0.99.1.9001     largeVis_0.2.2             
##  [25] matrixStats_0.54.0          xts_0.11-0                 
##  [27] pdist_1.2                   colorspace_1.3-2           
##  [29] rrcov_1.4-4                 ggrepel_0.8.0              
##  [31] haven_1.1.2                 tcltk_3.4.4                
##  [33] crayon_1.3.4                jsonlite_1.5               
##  [35] graph_1.56.0                survival_2.42-6            
##  [37] zoo_1.8-3                   iterators_1.0.10           
##  [39] glue_1.3.1                  gtable_0.3.0               
##  [41] car_3.0-2                   BiocGenerics_0.24.0        
##  [43] DEoptimR_1.0-8              abind_1.4-5                
##  [45] VIM_4.7.0                   scales_1.0.0               
##  [47] mvtnorm_1.0-8               cytofkit_1.10.0            
##  [49] miniUI_0.1.1.1              Rcpp_1.0.2                 
##  [51] xtable_1.8-3                laeken_0.4.6               
##  [53] htmlTable_1.12              foreign_0.8-71             
##  [55] FlowSOM_1.10.0              Formula_1.2-3              
##  [57] stats4_3.4.4                tsne_0.1-3                 
##  [59] vcd_1.4-4                   htmlwidgets_1.2            
##  [61] acepack_1.4.1               pkgconfig_2.0.2            
##  [63] XML_3.98-1.16               nnet_7.3-12                
##  [65] tidyselect_0.2.5            labeling_0.3               
##  [67] rlang_0.4.0                 reshape2_1.4.3             
##  [69] later_0.7.3                 munsell_0.5.0              
##  [71] cellranger_1.1.0            tools_3.4.4                
##  [73] evaluate_0.14               stringr_1.3.1              
##  [75] yaml_2.2.0                  knitr_1.20                 
##  [77] zip_1.0.0                   robustbase_0.93-2          
##  [79] caTools_1.17.1.1            purrr_0.3.2                
##  [81] RANN_2.6.1                  nlme_3.1-137               
##  [83] mime_0.5                    compiler_3.4.4             
##  [85] rstudioapi_0.7              curl_3.2                   
##  [87] e1071_1.7-0                 smoother_1.1               
##  [89] tibble_2.1.3                pcaPP_1.9-73               
##  [91] stringi_1.2.4               forcats_0.3.0              
##  [93] lattice_0.20-35             Matrix_1.2-14              
##  [95] vegan_2.5-2                 permute_0.9-4              
##  [97] pillar_1.4.2                lmtest_0.9-36              
##  [99] bitops_1.0-6                corpcor_1.6.9              
## [101] httpuv_1.4.5                R6_2.4.0                   
## [103] latticeExtra_0.6-28         promises_1.0.1             
## [105] KernSmooth_2.23-15          gridExtra_2.3              
## [107] rio_0.5.10                  Rtsne.multicore_0.0.99     
## [109] codetools_0.2-15            boot_1.3-20                
## [111] colourpicker_1.0            MASS_7.3-50                
## [113] gtools_3.8.1                assertthat_0.2.1           
## [115] destiny_2.6.2               rprojroot_1.3-2            
## [117] withr_2.1.2                 mgcv_1.8-24                
## [119] parallel_3.4.4              hms_0.4.2                  
## [121] rpart_4.1-13                class_7.3-14               
## [123] rmarkdown_1.10              carData_3.0-1              
## [125] TTR_0.23-3                  Biobase_2.38.0             
## [127] shiny_1.1.0                 base64enc_0.1-3